{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Event Recommendation Engine Challenge聚类\n",
    "思路：\n",
    "1、读取之前已存的数据模型\n",
    "2、从event.csv中筛选出对应的条目并保存。event.csv数据量较大，不适合一次性读入，\n",
    "3、对所选的event的101个关键词属性做聚类。由于样本数目较多，建议使用MiniBatchKMeans。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导入必要的工具包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import _pickle as cPickle\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn.preprocessing import normalize\n",
    "from sklearn import metrics\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "13418\n"
     ]
    }
   ],
   "source": [
    "# 读取之前已存的数据模型 \"PE_eventIndex.pkl\"\n",
    "eventIndex = cPickle.load(open(\"PE_eventIndex.pkl\", 'rb'))\n",
    "print(len(eventIndex))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "13418\n"
     ]
    }
   ],
   "source": [
    "# 读取events.csv文件数据。因该文件较大，所以分行读取 9-109\n",
    "eventCLists = []\n",
    "with open(\"events.csv\",'rb') as eventCsv:\n",
    "    eventCsv.readline()\n",
    "    for line in eventCsv: \n",
    "        cols = line.strip().split(b\",\") #在字符前加上b，直接指定这个\",\"是bytes类型\n",
    "#         cols[0] #event_id\n",
    "        if(cols[0] in eventIndex):  # 项目数据索引中包含该项目，选取此行的关键字\n",
    "#             print(cols[0],eventIndex[cols[0]])\n",
    "            eventCList = []\n",
    "            for i in range(9,110):\n",
    "                eventCList.append(int(cols[i]))\n",
    "            eventCLists.append(eventCList)\n",
    "    # 筛选完毕，保存数据\n",
    "    cPickle.dump(eventCLists, open(\"PE_eventCLists.pkl\", 'wb'))\n",
    "#         i = i +1\n",
    "#         if(i>10):\n",
    "#             break\n",
    "\n",
    "print(len(eventCLists))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.20851441 0.         0.20851441 ... 0.         0.         0.93831486]\n",
      " [0.24806947 0.         0.24806947 ... 0.         0.         0.86824314]\n",
      " [0.         0.         0.         ... 0.         0.         0.98639392]\n",
      " ...\n",
      " [0.         0.09853293 0.         ... 0.         0.         0.98532928]\n",
      " [0.         0.         0.         ... 0.         0.         0.98639392]\n",
      " [0.04504408 0.00750735 0.01501469 ... 0.         0.         0.99096977]]\n"
     ]
    }
   ],
   "source": [
    "# 特征标准化\n",
    "#词频，可以考虑我们用这部分特征进行聚类，得到活动的genre\n",
    "eventContMatrix = normalize(eventCLists, norm=\"l2\", copy=False)\n",
    "print(eventContMatrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "# n_clusters=8, init=’kmeans++’, n_init=10, m ax_iter=300, \n",
    "# tol=0.0001, precompute_distances=’auto’, verbose=0,\n",
    "# rando m_state=None, copy_x=True, n_jobs=1, algorithm=’auto’\n",
    "# batch_size =50\n",
    "# init='k-means++', n_clusters=3, batch_size=batch_size,\n",
    "#                       n_init=10, max_no_improvement=10, verbose=0\n",
    "# mbk = MiniBatchKMeans(init='k-means++',n_clusters=3, batch_size=batch_size, random_state=0)\n",
    "# mbk.fit(eventCLists)\n",
    "# 设置超参数搜索范围\n",
    "Ks = [10,20,30,40,50,60,70,80,90,100]\n",
    "\n",
    "# 将聚类部分的操作封装成函数，方便循环调用\n",
    "def KMeans_cluster(K, data, CH_scores):\n",
    "    km = MiniBatchKMeans(n_clusters = K)\n",
    "    km.fit(data)\n",
    "    \n",
    "    #预测结果\n",
    "    cluster_result = km.predict(data)\n",
    "    #评估预测结果\n",
    "    #得分越大越好\n",
    "    CH_score = metrics.silhouette_score(data,cluster_result)\n",
    "    CH_scores.append(CH_score)\n",
    "    \n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.40450455552360676, 0.29093250643592505, 0.17278801072448075, 0.17088536221521564, 0.1358672930718682, 0.09450518341168881, 0.12029315671127545, 0.06504510276580232, 0.06863585447188265, 0.10407705173799987]\n",
      "[0.13187989689215876, 0.03699923246559147, 0.0508192610183111, 0.027112126656295688, -0.0007000435905455075, 0.014938998927006338, -0.009017399857857227, 0.03856212776765369, 0.020514433150041356, 0.007547978027438839]\n"
     ]
    }
   ],
   "source": [
    "eventCLists_matrix = np.matrix(eventCLists)\n",
    "CH_scores = []\n",
    "CH_normalize_scores = []\n",
    "for i in Ks:\n",
    "    KMeans_cluster(i, eventCLists, CH_scores)\n",
    "    KMeans_cluster(i, eventContMatrix, CH_normalize_scores)\n",
    "#得分越大越好\n",
    "print(CH_scores)\n",
    "print(CH_normalize_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xd4VMX+x/H3pAGBANKDgIA0c+lJKIpUlaZ49YKIgvxQqlIUxYuKEsDQbCAgKEUEC03UUEUEJCot9A6hdyJcSgjp8/tjEpNgQhaym5Pd/b6eZx/c3cM536yHz05m5sxRWmuEEEK4Fg+rCxBCCGF/Eu5CCOGCJNyFEMIFSbgLIYQLknAXQggXJOEuhBAuSMJdCCFckIS7EEK4IAl3IYRwQV5WHbhEiRK6YsWKVh1eCCGc0tatW//SWpfMbjvLwr1ixYpERERYdXghhHBKSqkTtmwn3TJCCOGCJNyFEMIFSbgLIYQLknAXQggXJOEuhBAuSMJdCCFckIS7EEK4IJvCXSnVRil1UCkVqZQaepvtOiqltFIqyH4lZrT34l7eWv0WcntAIYTIWrbhrpTyBKYAbYEAoItSKiCT7fyAgcAmexeZ3uqjqxn7x1gW7lvoyMMIIYRTs6Xl3gCI1Fof1VrHA/OAJzPZbhQwHoi1Y33/0L9BfwL9Axm0chBXYq848lBCCOG0bAn3e4FT6Z6fTnntb0qpekB5rfXS2+1IKdVbKRWhlIqIioq642IBPD08+fzxz7l44yLv/PrOXe1DCCFcnS3hrjJ57e8Ob6WUB/AJ8Hp2O9Jaf6G1DtJaB5Usme26N1kKLBvIgAYDmBoxlU2nHdoLJIQQTsmWcD8NlE/3vBxwNt1zP6AmsE4pdRxoBIQ5clAVYFSLUZT1K0vvpb1JSEpw5KGEEMLp2BLuW4CqSqlKSikf4FkgLPVNrfVVrXUJrXVFrXVFYCPQQWvt0CUf/fL5MantJHZd2MXETRMdeSghhHA62Ya71joR6A/8DOwHFmit9yqlRiqlOji6wNv5d41/80S1Jxi+bjgnrti0CqYQQrgFZdV88aCgIG2P9dxPXj1JwJQAWlRqQdizYSiV2RCBEEK4BqXUVq11tt3eTn+FaoUiFRjRfARLDy3lhwM/WF2OEELkCU4f7gCDGg2iTuk6DFwxkGtx16wuRwghLOcS4e7l4cUXT3zB2etneXfNu1aXI4QQlnOJcAdocG8D+gX1Y/KWyUSclXuzCiHcm8uEO8DoVqMpVbAUfZb2ITE50epyhBDCMi4V7kXyF2Fim4lsO7eNKZunWF2OEEJYxqXCHaBTQCfaVmnLsLXDOH3ttNXlCCGEJVwu3JVSTGk3haTkJAauGGh1OUIIYQmXC3eASvdUYniz4fxw4AfCDoZl/xeEEMLFuGS4AwxuPJiapWrSf3l/ouOjrS5HCCFylcuGu7enN58//jmnrp0iZF2I1eUIIUSuctlwB3iw/IP0rt+bCRsnsOP8DqvLEUKIXOPS4Q4w9pGxFPctTp+lfUhKTrK6HCGEyBUuH+73FLiHT1p/wuYzm5kWMc3qcoQQIle4fLgDdKnZhUcrP8rba97m7PWz2f8FIYRwcm4R7kopPmv/GXGJcby68lWryxFCCIdzi3AHqFKsCsOaDmPhvoWsOLzC6nKEEMKh3CbcAYY8OIQHSjzAy8tfJiYhxupyhBDCYdwq3PN55WPa49M4fuU4I38baXU5QgjhMG4V7gBN72vKi3Vf5KMNH7H7wm6ryxFCCIdwu3AHGP/oeIrmL0qfpX1I1slWlyOEEHbnluFe3Lc4Hz32ERtOb2DGthlWlyOEEHbnluEO0K12N1pUbMF/V/+XC9EXrC5HCCHsym3DXSnF1PZTiUmIYfCqwVaXI4QQduW24Q5QvUR13mryFt/u/pZVR1ZZXY4QQtiNW4c7wNAmQ6lWvBovL3uZmwk3rS5HCCHswu3DPb9Xfqa1n8aR/x0hNDzU6nKEEMIu3D7cAVpUasELdV5g/B/j2R+13+pyhBAixyTcU3z46If45fOTue9CCJcg4Z6iZMGSjH9kPOEnw5m9Y7bV5QghRI5IuKfTo14PHq7wMEN+GULUjSiryxFCiLsm4Z6Oh/Jg2uPTuB53nTd+ecPqcoQQ4q5JuN8ioGQAbz70JnN2zmHtsbVWlyOEEHdFwj0T7zz8Dvffcz99l/UlLjHO6nKEEOKOSbhnooB3AT5r/xmHLh1i7O9jrS5HCCHumIR7Fh67/zG61OzC6N9Hc/Cvg1aXI4QQd0TC/TY+bv0xvt6+vLz8ZbTWVpcjhBA2k3C/jTKFyjC21VjWHFvD17u+trocIYSwmYR7NnoF9qJxucYMXjWYSzGXrC5HCCFsYlO4K6XaKKUOKqUilVJDM3m/r1Jqt1Jqh1Lqd6VUgP1LtYaH8uDzxz/nSuwV/rv6v1aXI4QQNsk23JVSnsAUoC0QAHTJJLy/1VrX0lrXBcYDH9u9UgvVKl2L1xu/zsztMwk/EW51OUIIkS1bWu4NgEit9VGtdTwwD3gy/QZa62vpnhYEXG708b1m71GxaEX6LO1DfFK81eUIIcRt2RLu9wKn0j0/nfJaBkqpV5RSRzAt94GZ7Ugp1VspFaGUioiKcq61W3y9fZnSbgr7/9rPB398YHU5QghxW7aEu8rktX+0zLXWU7TW9wP/BYZltiOt9Rda6yCtdVDJkiXvrNI8oF3VdnQK6MT74e8TeTnS6nKEECJLtoT7aaB8uuflgLO32X4e8O+cFJWXTWgzAR9PH15eJnPfhRB5ly3hvgWoqpSqpJTyAZ4FwtJvoJSqmu5pe+Cw/UrMW8r6lWV0y9H8cvQX5u2ZZ3U5QgiRqWzDXWudCPQHfgb2Awu01nuVUiOVUh1SNuuvlNqrlNoBDAa6O6ziPKBvUF8a3NuA135+jauxV60uRwgh/kFZ1bUQFBSkIyIiLDm2PWw9u5Xg6cEMbDiQCW0mWF2OEMJNKKW2aq2DsttOrlC9S4FlA+kT2IfJmyez+8Juq8sRQogMJNxzILRVKEXzF+WV5a/I4KoQIk+RcM+BYgWKMfaRsYSfDOeb3d9YXY4QQvxNwj2HXqz3Ig3ubcCQX4ZwLe5a9n9BCCFygYR7DnkoD6a0m8KF6AuErAuxuhwhhAAk3O0iqGwQvQN78+mmT2VwVQiRJ0i420loy1CK5C9C/xX9ZXBVCGE5CXc7Ke5bnDGtxrD+xHq+2/Od1eUIIdychLsdvVTvJYLLBvP6qtdlcFUIYSkJdzvy9PD8e3B1xLoRVpcjhHBjEu52FnxvMD3r92TiponsvbjX6nKEEG5Kwt0BRrcaLYOrQghLSbg7QAnfEoxuOZp1x9fJssBCCEtIuDtIz/o9CfQP5I1f3uB63HWryxFCuBkJdwdJHVw9e/0sI38baXU5Qgg3I+HuQA3LNaRnvZ5M2DSBfVH7rC5HCOFGJNwdbMwjY/Dz8aP/chlcFULkHgl3ByvhW4LQlqGsPb6WBXsXWF2OEMJNSLjngt6BvanvX5/BqwbL4KoQIldIuOeC9IOro9aPsrocIYQbkHDPJY3KNeLFui/yycZP2B+13+pyhBAuTsI9F419ZCyFfArJlatCCIeTcM9FJQuWJLRlKGuOrWHhvoVWlyOEcGES7rmsT2Af6pWpx+CfBxMdH211OUIIFyXhnstSB1fPXD/DqN9kcFUI4RgS7hZoXL4xPer24OONH3PgrwNWlyOEcEES7hZJHVwdsGKADK4KIexOwt0ipQqWYlSLUaw+uppF+xZZXY4QwsVIuFuob1Bf6papy+BVMrgqhLAvCXcLeXl4MaXdFE5fO03o+lCryxFCuBAJd4s9WP5ButfpzkcbPuLgXwetLkcI4SIk3POAcY+Mw9fbVwZXhRB2I+GeB5QuVJpRLUbxy9FfWLx/sdXlCCFcgIR7HtEvuB+1S9fmtZ9f40b8DavLEUI4OQn3PCJ1cPXUtVOEhsvgqhAiZyTc85AmFZrwQp0X+PDPDzl06ZDV5QghnJiEex4z/pHxFPAuIIOrQogckXDPY0oXKs3I5iNZdWQVPxz4wepyhBBOyqZwV0q1UUodVEpFKqWGZvL+YKXUPqXULqXUr0qp++xfqvt4pcEr1CpVi9d+fo2YhBiryxFCOKFsw10p5QlMAdoCAUAXpVTALZttB4K01rWBRcB4exfqTlIHV09ePcno8NFWlyOEcEK2tNwbAJFa66Na63hgHvBk+g201mu11qlNzI1AOfuW6X4evu9hutbuygd/fsDhS4etLkcI4WRsCfd7gVPpnp9OeS0rLwErclKUMMY/Mp58nvkYuHKgDK4KIe6ILeGuMnkt06RRSnUFgoAPsni/t1IqQikVERUVZXuVbsrfz5+RLUayMnIlPx38yepyhBBOxJZwPw2UT/e8HHD21o2UUo8A7wAdtNZxme1Ia/2F1jpIax1UsmTJu6nX7fRv0J+apWoyaOUgGVwVQtjMlnDfAlRVSlVSSvkAzwJh6TdQStUDPscE+0X7l+m+0g+ujgkfY3U5QggnkW24a60Tgf7Az8B+YIHWeq9SaqRSqkPKZh8AhYCFSqkdSqmwLHYn7kLT+5ryXK3nGP/neCIvR1pdjhDCCSirBuqCgoJ0RESEJcd2Rmevn6XG5Bo0qdCEZc8tQ6nMhkKEEK5OKbVVax2U3XZyhaqTKOtXlpDmIayIXEHYQfnFSAhxexLuTmRAgwH8q+S/ZHBVCJEtCXcn4u3pzeR2kzlx9QRjfx9rdTlCiDxMwt3JNK/YnC41uzD+j/EcuXzE6nKEEHmUhLsT+vCxD/H29JYrV4UQWZJwd0Jl/coyvNlwlh9ezpJDS6wuRwiRB0m4O6lBDQcRUDKAQSsHcTPhptXlCCHyGC+rCxB3x9vTm8ltJ9NyTksemvUQZf3Kks8rH/k885HPKx8+Hj4Znmf2p4/n7bfx8fTJ9DUPJW0CIfI6CXcn1qJSC0JbhrL00FLORZ8jLjGOuKQ44hLjiE+K//u/45LiSNbJdjuut4f3P74gCngVoG2Vtgx5aAhlCpWx27GEEHdHrlB1E4nJiRnCP6svgbvaJimOyzcvszJyJT6ePvQN7MubD72Jv5+/1T+2EC7H1itUpeXuJrw8vPDy8aIgBR12jMOXDhMaHsqkzZOYGjGV3oG9+e9D/+Xewrdb/l8I4QjSeSrspmrxqsz+92wO9j/I87WeZ2rEVCp/Wpn+y/tz6uqp7HcghLAbCXdhd/cXu5+ZT87kUP9DdK/Tnc+3fk6VSVXot7QfJ6+etLo8IdyChLtwmEr3VOKLJ74gckAkPer2YOb2mVT5tAp9lvTh+JXjVpcnhEuTcBcOd1/R+5j2+DQiB0bSq34vZu+cTdVJVekZ1pOj/ztqdXlCuCQJd5FrKhSpwJT2Uzgy8Ah9A/vy9a6vqTapGj1+6iE3IRHCziTcRa4rV7gck9pN4uigo/Rv0J95e+ZRY3INuv/YnUOXDlldnhAuQcJdWKasX1kmtJnA0YFHGdhwIAv3LuSBKQ/QdXFXDvx1wOryhHBqEu7Ccv5+/nzc+mOODTrG4EaD+eHADwRMCeC5759jX9Q+q8sTwilJuIs8o3Sh0nzw2AccG3SMIQ8OIexgGDU/q0nnRZ3Zc3GP1eUJ4VQk3EWeU6pgKcY9Oo7jrx5naJOhLD+8nFpTa9FxQUd2XdhldXlCOAUJd5FnlfAtwehWozk+6DjvPPwOq46sos60Ojw9/2l2nN9hdXlC5GkS7iLPK+5bnPdbvs+JV0/wXtP3WHNsDfU+r8eT855k69mtVpcnRJ4k4S6cxj0F7mFEixEcf/U4I5qPYP2J9QRND+KJ755gy5ktVpcnRJ4i4S6cTtH8RXmv2XscH3ScUS1G8cfJP2gwowHtvmnHptObrC5PiDxBwl04rSL5izCs6TCOv3qc0S1Hs/nMZhrNbESnhZ04ceWE1eUJYSnnC/eEBPjjD6urEHlI4XyFeevhtzg26Bgjmo9g2aFl1JhSg5B1IcQkxFhdnhCWcL5wDwmBFi1gi/Sxioz88vnxXrP3OND/AB2qd2DEbyN4YMoDLNq3CKvuOCaEVZwv3F9/Hfz9oXNnuHLF6mpEHlShSAXmd5zPuu7rKJq/KJ0WdqLVnFbsvrDb6tKEyDXOF+7FisG8eXDqFPTsCdIiE1loVrEZW3tv5bN2n7Hzwk7qfV6PAcsHcPnmZatLE8LhnC/cARo3htGj4fvvYepUq6sReZiXhxf9gvtxqP8h+gT24bOIz6g2qRrTIqaRlJxkdXlCOIxzhjuY7pl27eC112D7dqurEXlccd/iTGk/he19tlOzVE36LetH0PQgwk+EW12aEA7hvOHu4QFffQUlS8Izz8C1a1ZXJJxA7dK1Wdt9LfM7zudSzCWazm5Kl++7uNwNvM9Hn+eTDZ8Quj6Umwk3rS5HWEBZNYsgKChIR0RE5HxHv/8OzZtDp07w7begVM73KdxCTEIM434fx7g/xuHp4cnbTd7m9QdfJ79XfqtLuysxCTH8dOAn5uyaw6ojq0jWyQDUK1OP75/5nkr3VLK4QmEPSqmtWuug7LZz3pZ7qiZNYORIM8g6fbrV1Qgn4uvty4gWI9j/yn7aVGnDsLXDCJgSwI8HfnSaqZPJOpk1x9bQ46celPmwDM8tfo69F/cy9KGh7H9lP0u6LOHYlWMEfhHIisMrrC5X5CLnb7kDJCdD27awfj1s2gS1a9tnv8Kt/Hr0VwatHMTeqL08WvlRJraZyAMlH7C6rEzti9rH3J1z+Wb3N5y6dgo/Hz86BXSiW51uNL2vKR4qrd125PIR/rPgP+y6sIuQ5iEMazosw/vCudjacneNcAe4eBHq1oXChSEiAgoVst++hdtISEpgasRUhq8bTnR8NP2D+zO8+XCK5i9qdWlcvHGR73Z/x9xdc9l6biueypPWVVrTrXY3OlTvgK+3b5Z/NyYhhr5L+zJ311zaVW3H1099zT0F7snF6oW9uF+4A6xbB61awXPPwZw50v8u7lrUjSiGrRnG9G3TKeFbgjGtxtCjXo9cb/HeTLhJ2MEw5u6ay8rIlSTpJOr716db7W50qdmF0oVK27wvrTXTIqYxaOUgyhUux/fPfE89/3oOrF44gnuGO5j+9+HDYeZMePFF++9fuJVt57YxYMUA/jz1J4H+gUxqO4nG5Rs79JjJOpnwE+HM3TWXhfsWci3uGuUKl+P5Ws/TrXY3/lXqXzna/8bTG+m4oCOXbl5iWvtpdK/b3U6Vi9xga7ijtc72AbQBDgKRwNBM3m8KbAMSgY627DMwMFA7RGKi1i1bal2ggNZ79jjmGMKtJCcn6292faPLflRWE4LutribPnPtjN2PcyDqgH7n13f0fZ/cpwlBFxpdSHf/obtefWS1TkxKtOuxLkRf0C1mt9CEoPst7adjE2Ltun+RtdiEWJ2UnHTXfx+I0LbkdrYbgCdwBKgM+AA7gYBbtqkI1AbmWB7uWmt97pzWpUtrHRCgdXS0444j3Mr1uOv6rdVvaZ9RPrrQ6EJ6bPjYHIdi1I0oPWnTJB38RbAmBO0xwkO3nttaf73zax0d59hzNyEpQb+56k1NCLrB9Ab65JWTDj2eu4tNiNVTt0zV5T8urxfvW3zX+7E13G3pQGwARGqtj2qt44F5wJO3tP6Pa613Ack27M/xypSBb76B/fthwACrqxEuopBPIUa3Gs2+l/fRslJLhv46lJpTa7Ls0LI72k9sYiwL9y6kw3cd8P/InwErBhCfFM9Hj33E6ddOs7LrSp6v/TwFfQo66CcxvDy8GPfoOBZ1WsT+qP3U/6I+vx791aHHdEdxiXFMi5hG1UlV6besH+WLlL+jsZK7ll36Ax2BGemedwMmZ7HtbPJCyz3Vu++aX07mzHH8sYTbWXl4pa4+qbomBN3267b6QNSBLLdNTk7W64+v173CeukiY4poQtD+H/rrIauG6F3nd+Vi1Zk7EHVAB0wJ0B4jPPTY8LE6OTnZ6pKcXlxinJ62ZZou/3F5TQi68YzGelXkqhx/ttixW6ZTJuE+KYttbxvuQG8gAoioUKFCjn5AmyQmat2smdYFC2q9f7/jjyfcTlxinP7oz4904TGFtfdIbz1k1RB9Nfbq3+8f+uuQfnfNu7rShEqaELRvqK/utribXhW5yu796Dl1Pe667rywsyYE/dS8pzL8HMJ2qaFe4ZMKmhB0oxmN9M+RP9vtC9PWcM92toxSqjEQorVunfL8rZQW/5hMtp0NLNVaL8ruNwaHzZa51ZkzZv67v7+5wKlAAccfU7idC9EXePvXt5m1YxZlCpWhZ72erD62mo2nN6JQtKrcihdqv8BTDzxFIZ+8ew2G1pqJmybyxqo3uL/Y/Sx+ZnGOZ+e4i/ikeGbvmE1oeCgnr56kUblGjGg+gkcrP4qy47Rsu82WAbyAo0Al0gZU/5XFtrPJS90yqVasML+k9OqVe8cUbmnT6U264fSGmhB0zc9q6vG/j9enr562uqw7tv74el36g9LaN9RXf7f7O6vLydPiEuP05xGf/91Sbzi9oV55eKXDurawV7eM2RftgEOYWTPvpLw2EuiQ8t/BwGngBnAJ2JvdPnM13LXWeuhQ8+N++23uHle4naTkJH322lmn77c+c+2MfmjmQ5oQ9KsrXtXxifFWl5SnxCXG6S8ivvh76qqjQz2VreHuehcxZSUx0aweuXMnbNsGVavm3rGFcFIJSQkM+WUIEzdNpEmFJizouAB/P3+ry7JUfFI8X+34itDwUE5cPUHDexsS0jyE1ve3tmv3S1bcZ1VIW3l5wXffgY+PWf89NtbqioTI87w9vZnQZgLfPv0t285to/4X9d32BicJSQnM2DaD6pOr03tpb0oXKs2K51ew4aUNtKnSJleC/U64T7gDlC9v1pzZscPcyUkIYZMutbqwqecm/Hz8aDmnJRM3TsSq3/pzW2qoV5tcjV5LelGqYCmWP7ecjS9tzJOhnsq9wh2gfXt44w347DNYlO2kHiFEipqlarKl1xbaV23Pqz+/SpfvuxAdH211WQ6TkJTAzG0z/w71kr4l/w71tlXb5tlQT+U+fe7pJSRA06awb5/pf7//fmvqEMIJJetkxv8xnnfWvEONEjVY/MxiqpeobnVZdpOQlMCcnXN4P/x9jl85TnDZYEKah9C2St4IdOlzvx1vb3PnJg8P6NwZ4uKsrkgIp+GhPBjaZCiruq7i4o2LBE8P5of9P1hdVo4lJCUwa/ssqk+uTs8lPSnhW4Jlzy1jU89NtKvaLk8E+51wz3AHuO8++PJL2LoV3nzT6mqEcDqtKrdia++t1ChRg6cXPM3Q1UNJTE60uqw7lj7UXwp7ieK+xVnaZSmbe252ylBP5b7hDvDvf8OgQfDpp/CD87c8hMhtFYpUILxHOH0C+zDuj3G0/ro1F29ctLosm9wa6sUKFGNJlyVs7rmZ9tXaO22op3LPPvf04uPhoYcgMhK2b4eKFa2uSAinNHvHbPot60cJ3xIs6rSIhuUaWl1SphKSEvh619e8H/4+R/93lED/QEKah9C+qnMEuvS528rHB+bPNzfZ7tzZhL0Q4o79X93/488X/8Tbw5uHv3yYaRHT8tR0yYSkBL7c/iU1ptTgxbAXKZq/KGHPhrGl1xYer/a4UwT7nfCyuoA8oXJlmDULOnaEt9+GDz+0uiIhnFI9/3pE9I6g6+Ku9FvWjw2nNzC1/dTb3rw7O3GJcVyPv871uOu2/ZnFe1dir3Aj4Qb1/esT9myYSwZ6etItk17//jBlCixZAo8/bnU1QjitZJ3MyN9GMvK3kdQuXZvQlqHEJ8XfURin/pmQnGDTMX08ffDz8cMvn1/mf/r40apyK56o9oRTh7r73iA7J2Jj4cEH4cQJcxVr+fJWVySEU1t+eDnPL36eK7FX/vGep/LMOojTBXIhn0LZb5fPDx9PHwt+wtwn4X63IiOhfn2oVQvWrTNz4oUQd+3ijYsc/OvgPwI5v1d+p25BW8XWcJc+91tVqQLTp8Ozz8K778LYsVZXJIRTK1WwFKUKlrK6DLcjs2Uy07kz9OkD48bBihVWVyOEEHdMwj0rn3wCtWvDCy+YW/UJIYQTkXDPSoECsGAB3LwJXbqYm30IIYSTkHC/nerVYdo0CA+HESOsriajxETYswfmzjVdR3LxlRAiHRlQzU7XrrB2LYSGQrNm8MgjuV9DXBzs3WuWJ0597NyZ8W5SRYvCU09Bp07QqpW58lYI4bZkKqQtYmKgQQOIijLz3/0deA/JmBjYtStjkO/ZY9agByhSxEzVTH3UrQvHjpkupB9/hGvX4J57TNA/8wy0bCnTOYVwITLP3d727YPgYGjYEH75BTw9c77Pa9fMl0X6IN+/36xzA1CiRMYgDwyESpUgq7nBcXGmtgUL4KefzP6LFUsL+hYtJOiFcHIS7o4wezb06AEhITB8+J393UuXzKqT6YP88OG098uWzRjk9etDuXJZB3l2YmNh1aq0oI+OhuLF4emnTdA3b25uGi6EcCoS7o7SvbsZxPz1V9MSzsz58xlDfNs2s6RBqooVM4Z4vXpQpozjao6NhZ9/NkEfFmaCvkSJtKBv1kyCXggnIeHuKNHRpnvmyhXTpRIX988gP3cubftq1f4Z5MWKWVf/zZuwciUsXGiC/sYNKFkS/vMfMxjbrJl9upyEANDaLKl97Jg5t4KDpWswhyTcHWn3bjPAmpSUNtDp4QEBARmDvE4dKFzY2lpv5+ZNM41ywQKzEmZMDJQqZYL+mWfg4Ycl6MXdO3MGevXKeJV3wYLQpIkZ6G/RwjR25LfGOyLh7mg//mhO2rp10xYa8737NastFxOTFvRLl5rnpUunBX2TJhL0wjZam67LgQNN42fcOLOkR3g4rFljphbv22e2LVzYtOhbtDCP2rVNQ0lkScJd3L0bN2D5chP0y5aZFn6ZMuZmJp06mdsSStCLzJw7Z9ZlWrLENAi+/NIsxner8+fNqqtr15pH6uSCYsXMYH9q2AcE3P2kAhcl4S7sIzo6Y9DHxpp5/h07mhb9gw9KS0uY1vp335kb3ty8CWPGwIABtjcCTp9OC/o1a9ImIJQunRb2LVuaLwo3D3sJd2F/0dGmy2bBAtOFExuowLgqAAAK/0lEQVRrpnCmBn3jxhL07ujiRejXDxYvhkaNzJTh6tVzts9jxzKG/dmz5vV7700L+hYt3PKG9hLuwrGuX88Y9HFxcP/98NJL8H//59ireEXesXAhvPyyOR9GjYLBg+3fZae16bZJDfq1a83V4mDCPTXoW7Qw4e/iJNxF7rl2zVwoNWuW6Uf19DT3oO3VC9q0kf55V/TXX/DKK+bLPTjYtNYDAnLn2FqbAdnUoF+3Dv73P/Ne1appYd+8uenWcTES7sIahw/DjBnmH/vFi6Yl9eKLpkV/331WVyfs4YcfoG9fE6ghIfDmm9ZOZ0xONgvppXbjrF9vGhwA//pXWqu+WTNzlbaTk3AX1kpIMDMmZswwF00BPPYY9OwJHTrIqpXO6PJlM73xm2/M/PSvvjJTgPOaxERzMWFq2IeHm6m9SpmLCmvXNtegpD5yssyHBSTcRd5x8qTpspk1C06dMlfEdu9ugj6nA28idyxdarrZ/vrL3Fv4rbec50rT+HjYssUE/datZtXVo0fT3i9W7J+BHxAA+fNbV/NtSLiLvCcpySxmNn26adUnJpqrYHv1MjNuChSwukL7OHXKfKEFBzv/byhXrsCrr6a10r/6yrTand21a+ZK85070x67d5sWPphxoho1/hn6ZcpY3sqXcBd52/nzJihmzIDISLNOfdeuJujr1LG6OtvFxpougA0bzGPjxrR77jr7uvorVpj/H+fPm5b6u+86/5fV7SQlwZEjGQN/507zZZ2qZMmMYV+njvkSyMXPRcJdOAet4bffTGv+++/NlMrgYNNl06UL+PlZXWEarc3FNRs3pgX59u1p6wtVqmTmeTdubFp4S5aYZSquXzcDealLOTRvnrdnEF29Cq+/DjNnmu6Jr76CoGyzxHVdvmy6clLDftcucwOduDjzvre3+ZzSB37t2uaLwAEk3IXzuXwZvv7aBP2ePWaRqWefNa3HBg1y/9fhmzchIiItyDdsMK1YMF1IwcEmyBs1Mo/Mlm2OjTUDyqnLLd+4YRZn69jRrLeS15ZyWL3azG46c8bMghk+PM/2PVsqMREOHfpnKz/9irD+/v9s5VerluOZRRLuwnlpDZs3m5CfN88EYs2aJuS7dnXMkslam6si0wf5zp3mHzGYC7QaN04L81q17rybJSbGLOUwf37amj3+/ma9ns6dzX6tusL3+nUT5tOmmUHu2bNNPeLOREX9M/D370/77S5/fjM98513TJfdXbBruCul2gATAU9ghtZ67C3v5wPmAIHAJaCz1vr47fYp4S5scv26Cfjp082Mh3z5TPdGz56me+NuW/M3bqS1ylMD/eJF817BguY3hfStcnv/ip26lMP8+WlX+JYvnxb0wcG595vK2rWmtX7ihLnCdNQo1xnczgvi403Ap3bp7NwJr70G7drd1e5sDXe01rd9YAL9CFAZ8AF2AgG3bPMyMC3lv58F5me338DAQC3EHdmxQ+v+/bUuWlRr0LpKFa3HjtX63Lnb/73kZK0PH9Z6zhyt+/XTul49rT09zT5A62rVtO7eXeupU80xEhJy5cf529WrWs+dq/Xjj2vt7W1qqlhR6zff1HrrVlO/I0RHm88z9bP8/XfHHEfYFRChs8lXrXX2LXelVGMgRGvdOuX5WylfCmPSbfNzyjYblFJewHmgpL7NzqXlLu7azZtm8HX6dHM1opcXPPGE6bZ57DHz/ubNaS3yjRvN/GwwA7QNG6YNfDZsmLeuWrxyxQzCzp9v+r8TE81KiM88Y1r0tWrZp0UfHm7uB3zkiLkwacwY574fgRuxW7eMUqoj0EZr3TPleTegoda6f7pt9qRsczrl+ZGUbf7Kar8S7sIuDh40szpmzzb9ncWKmYBMTjbvP/BAWpA3bmye56UBzNu5dMlc6j9/vllHJTnZTLtLDfq7WcslJgaGDYMJE8yiW19+aS7LF07DnuHeCWh9S7g30FoPSLfN3pRt0od7A631pVv21RvoDVChQoXAE+lvGi1ETsTHm6mHYWFmSmLjxqbf/J57rK7MPi5eNEvqzp9vpo5qbQaZU4O+WrXs97Fhg1mx89Ahs5LjuHFQqJDDSxf2Zc9wl24ZIfKS8+dh0SIT9L//bl6rWzct6CtXzrh9bCy89x589JFZR2XWLGjVKvfrFnZha7jbMu9qC1BVKVVJKeWDGTANu2WbMKB7yn93BNbcLtiFEDlQpoy541F4uLmD0SefmCl2b79tpmwGB8MHH5jZL5s3m3v8fvCBmWG0e7cEu5uwdSpkO2ACZubMLK11qFJqJGbUNkwplR+YC9QDLgPPaq2PZr1HabkLYXcnTpibZ8yfb6Z5ghl8LVvWjEu0bm1tfcIu5CImIdzZ0aPmqtjoaHjjDSha1OqKhJ3YGu4WrrAvhHCYypVh6FCrqxAWkrsZCyGEC5JwF0IIFyThLoQQLkjCXQghXJCEuxBCuCAJdyGEcEES7kII4YIk3IUQwgVZdoWqUioKcPZlIUsAWS5r7Ibk80gjn0VG8nlklJPP4z6tdba3BrMs3F2BUirClsuA3YV8Hmnks8hIPo+McuPzkG4ZIYRwQRLuQgjhgiTcc+YLqwvIY+TzSCOfRUbyeWTk8M9D+tyFEMIFSctdCCFckIS7jZRS5ZVSa5VS+5VSe5VSg1JeL6aU+kUpdTjlTxe5I3P2lFKeSqntSqmlKc8rKaU2pXwW81Nuy+gWlFJFlVKLlFIHUs6Rxu56biilXkv5N7JHKfWdUiq/O50bSqlZSqmLSqk96V7L9FxQxqdKqUil1C6lVH171SHhbrtE4HWt9QNAI+AVpVQAMBT4VWtdFfg15bm7GATsT/d8HPBJymfxP+AlS6qyxkRgpda6BlAH87m43bmhlLoXGAgEaa1rYm7N+SzudW7MBtrc8lpW50JboGrKozcw1W5VaK3lcRcP4CfgUeAg4J/ymj9w0OracunnL5dykrYElgIKc1GGV8r7jYGfra4zlz6LwsAxUsaw0r3uducGcC9wCiiGudPbUqC1u50bQEVgT3bnAvA50CWz7XL6kJb7XVBKVcTcDHwTUFprfQ4g5c9S1lWWqyYAbwLJKc+LA1e01okpz09j/qG7g8pAFPBlSjfVDKVUQdzw3NBanwE+BE4C54CrwFbc99xIldW5kPplmMpun42E+x1SShUCvgde1Vpfs7oeKyilHgcuaq23pn85k03dZSqWF1AfmKq1rgfcwA26YDKT0pf8JFAJKAsUxHQ93Mpdzo3sOOzfjYT7HVBKeWOC/Rut9eKUly8opfxT3vcHLlpVXy56COiglDoOzMN0zUwAiiqlUm+6Xg44a015ue40cFprvSnl+SJM2LvjufEIcExrHaW1TgAWAw/ivudGqqzOhdNA+XTb2e2zkXC3kVJKATOB/Vrrj9O9FQZ0T/nv7pi+eJemtX5La11Oa10RM1i2Rmv9PLAW6JiymVt8FgBa6/PAKaVU9ZSXWgH7cMNzA9Md00gp5Zvybyb1s3DLcyOdrM6FMOCFlFkzjYCrqd03OSUXMdlIKdUECAd2k9bP/Dam330BUAFzYnfSWl+2pEgLKKWaA29orR9XSlXGtOSLAduBrlrrOCvryy1KqbrADMAHOAr0wDSe3O7cUEqNADpjZphtB3pi+pHd4txQSn0HNMes/HgBGA78SCbnQsoX4GTM7JoYoIfWOsIudUi4CyGE65FuGSGEcEES7kII4YIk3IUQwgVJuAshhAuScBdCCBck4S6EEC5Iwl0IIVyQhLsQQrig/wfwTDi+X4W3JQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 绘制评分结果\n",
    "plt.plot(Ks, CH_scores,color='green')\n",
    "plt.plot(Ks, CH_normalize_scores,color='red')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 有个疑问，不知道为什么不进行特征编码得到的评分反而会比编码后的评分更高\n",
    "#### 针对这个问题，有一个猜想，特征编码后每个元素的L2距离变近了，变得不那么容易区分了所以得分降低，如果真的是这样，那么特征编码还是否有必要"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
